import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from yellowbrick.cluster import KElbowVisualizer
from sklearn.metrics import silhouette_score, davies_bouldin_score
df = pd.read_csv("heart_clean.csv")
df
| index | age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | age_bin | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 52 | 1 | 0 | 125 | 212.0 | 0 | 1 | 168 | 0 | 1.0 | 2 | 2 | 3 | 0 | (50, 60] |
| 1 | 1 | 53 | 1 | 0 | 140 | 203.0 | 1 | 0 | 155 | 1 | 3.1 | 0 | 0 | 3 | 0 | (50, 60] |
| 2 | 2 | 70 | 1 | 0 | 145 | 174.0 | 0 | 1 | 125 | 1 | 2.6 | 0 | 0 | 3 | 0 | (60, 70] |
| 3 | 3 | 61 | 1 | 0 | 148 | 203.0 | 0 | 1 | 161 | 0 | 0.0 | 2 | 1 | 3 | 0 | (60, 70] |
| 4 | 4 | 62 | 0 | 0 | 138 | 294.0 | 1 | 1 | 106 | 0 | 1.9 | 1 | 3 | 2 | 0 | (60, 70] |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 297 | 723 | 68 | 0 | 2 | 120 | 211.0 | 0 | 0 | 115 | 0 | 1.5 | 1 | 0 | 2 | 1 | (60, 70] |
| 298 | 733 | 44 | 0 | 2 | 108 | 141.0 | 0 | 1 | 175 | 0 | 0.6 | 1 | 0 | 2 | 1 | (40, 50] |
| 299 | 739 | 52 | 1 | 0 | 128 | 255.0 | 0 | 1 | 161 | 1 | 0.0 | 2 | 1 | 3 | 0 | (50, 60] |
| 300 | 843 | 59 | 1 | 3 | 160 | 273.0 | 0 | 0 | 125 | 0 | 0.0 | 2 | 0 | 2 | 0 | (50, 60] |
| 301 | 878 | 54 | 1 | 0 | 120 | 188.0 | 0 | 1 | 113 | 0 | 1.4 | 1 | 1 | 3 | 0 | (50, 60] |
302 rows × 16 columns
df.drop(columns=["age_bin"], inplace=True)
f"Usia Minimal = {df['age'].min()}, Usia Maksimal = {df['age'].max()}"
'Usia Minimal = 29, Usia Maksimal = 77'
df.groupby(["target"])["target"].count().reset_index(name='total_target')
| target | total_target | |
|---|---|---|
| 0 | 0 | 138 |
| 1 | 1 | 164 |
# Define age bins and labels
age_bins = range(20, 90, 10) # 0-9, 10-19, ..., 90-100
age_labels = [f"{i}-{i+9}" for i in age_bins[:-1]]
df['age_group'] = pd.cut(df['age'], bins=age_bins, labels=age_labels, right=False)
age_target_avg = df.groupby(["age_group","target"])["target"].count().reset_index(name='total_age_category')
age_target_avg = age_target_avg[age_target_avg["target"]==1]
age_target_avg = age_target_avg.total_age_category.mean()
f'Nilai rata-rata dari pengidap serangan jantung di setiap usia {round(age_target_avg,0)}'
/tmp/ipykernel_3504/3424322441.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. age_target_avg = df.groupby(["age_group","target"])["target"].count().reset_index(name='total_age_category')
'Nilai rata-rata dari pengidap serangan jantung di setiap usia 27.0'
import plotly.graph_objects as go
fig = go.Figure()
# Add histogram trace for "Heart Disease Absent" (target = 0) in grey
fig.add_trace(go.Histogram(
x=df[df['target'] == 0]['age'],
marker=dict(color='grey'),
name='No Heart Disease',
xbins=dict(size=10)
))
# Add histogram trace for "Heart Disease Present" (target = 1) in red
fig.add_trace(go.Histogram(
x=df[df['target'] == 1]['age'],
marker=dict(color='#C62E2E'),
name='Heart Disease',
xbins=dict(size=10)
))
# Add dashed line for average count
fig.add_shape(
type="line",
x0=0, x1=1, y0=27, y1=27,
xref="paper", yref="y",
line=dict(color="#4A4947", width=2, dash="dash")
)
# Update layout
fig.update_layout(
title=dict(
text="Heart Disease Count by Age",
font=dict(size=20, color="black", family="Arial", weight="bold"),
x=0.08,
xanchor="left"
),
xaxis=dict(
title="Age",
title_font=dict(size=14, weight="bold"),
tickmode="linear",
dtick=10,
showline=True,
linecolor="black",
linewidth=2
),
yaxis=dict(
title=None,
showline=True,
showticklabels=False,
linewidth=2,
),
plot_bgcolor="white",
bargap=0.2,
barmode='group'
)
# Add hover template for custom text on hover
fig.update_traces(
hovertemplate='Age Range: %{x}<br>Count: %{y}',
)
# Adjust annotation with flexible positioning
fig.add_annotation(
text="The age range of <b>40 to 69 is the most likely<br>age for an individual to experience a heart <br>attack</b>, as the number of heart attack cases<br>in this age group is above the average of<br>other age groups.",
xref='paper',
yref='paper',
x=0.335,
y=1.1,
showarrow=False,
xanchor="right",
font=dict(size=12, color='black'),
align='left'
)
# Adjust annotation with flexible positioning
fig.add_annotation(
text=f'<b>Heart Disease avg (27)</b> ',
xref='paper',
yref='paper',
x=1.03,
y=0.43,
showarrow=False,
xanchor="right",
font=dict(size=12, color='#4A4947'),
align='left'
)
# Show plot
fig.show()
pyramid_data = df.groupby(['age_group', 'sex']).size().unstack(fill_value=0)
pyramid_data.columns = ['Female', 'Male']
pyramid_data
/tmp/ipykernel_3504/985610881.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
| Female | Male | |
|---|---|---|
| age_group | ||
| 20-29 | 0 | 1 |
| 30-39 | 5 | 9 |
| 40-49 | 19 | 53 |
| 50-59 | 34 | 91 |
| 60-69 | 33 | 47 |
| 70-79 | 5 | 5 |
# Hitung jumlah total berdasarkan jenis kelamin dan kelompok usia
pyramid_data = df.groupby(['age_group', 'sex']).size().unstack(fill_value=0)
pyramid_data.columns = ['Female', 'Male']
# Membuat nilai female menjadi negatif untuk visualisasi piramida
pyramid_data['Female'] = -pyramid_data['Female']
total_females = pyramid_data['Female'].abs().sum()
total_males = pyramid_data['Male'].sum()
# Membuat figure
fig = go.Figure()
# Tambahkan bar untuk female (sisi kiri, nilai negatif)
fig.add_trace(go.Bar(
y=pyramid_data.index,
x=pyramid_data['Female'],
name='Female',
orientation='h',
marker=dict(color='salmon'),
customdata=[abs(val) for val in pyramid_data['Female']],
hovertemplate='Age Range: %{y}<br>Count: %{customdata}'
))
# Tambahkan bar untuk male (sisi kanan)
fig.add_trace(go.Bar(
y=pyramid_data.index,
x=pyramid_data['Male'],
name='Male',
orientation='h',
marker=dict(color='teal'),
hovertemplate='Age Range: %{y}<br>Count: %{x}'
))
# Pengaturan layout
fig.update_layout(
title='Number of Sample by Age and Sex',
xaxis=dict(
title=None,
showline=True,
showticklabels=False,
),
yaxis=dict(
title='Age Group',
categoryorder='category ascending'
),
barmode='relative',
bargap=0.12,
plot_bgcolor='white',
showlegend=True
)
# Tambahkan anotasi dengan posisi fleksibel
fig.add_annotation(
text=f'Total sample <b>male domination: {(round(total_males / (total_males + total_females), 2)) * 100}%</b> '
f'with females: {total_females} and males: {total_males}',
xref='paper',
yref='paper',
x=0.45,
y=1.1,
showarrow=False,
xanchor="right",
font=dict(size=12, color='black'),
align='left'
)
fig.show()
/tmp/ipykernel_3504/3835416259.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
gender = df[["sex","target","age_group"]]
gender.head()
| sex | target | age_group | |
|---|---|---|---|
| 0 | 1 | 0 | 50-59 |
| 1 | 1 | 0 | 50-59 |
| 2 | 1 | 0 | 70-79 |
| 3 | 1 | 0 | 60-69 |
| 4 | 0 | 0 | 60-69 |
gender = df[["sex", "target", "age_group"]]
one_hot_encoded_data = pd.get_dummies(gender, columns=['target', 'sex'])
one_hot_encoded_data.groupby(["age_group"])["target_0"].count().reset_index()
/tmp/ipykernel_3504/904798733.py:3: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
| age_group | target_0 | |
|---|---|---|
| 0 | 20-29 | 1 |
| 1 | 30-39 | 14 |
| 2 | 40-49 | 72 |
| 3 | 50-59 | 125 |
| 4 | 60-69 | 80 |
| 5 | 70-79 | 10 |
one_hot_encoded_data.columns
Index(['age_group', 'target_0', 'target_1', 'sex_0', 'sex_1'], dtype='object')
sex_mapping = {
0 : 'Female',
1 : 'Male'
}
# Create the cp_category column based on the cp column
df['sex_category'] = df['sex'].map(sex_mapping)
df.head()
| index | age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | age_group | sex_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 52 | 1 | 0 | 125 | 212.0 | 0 | 1 | 168 | 0 | 1.0 | 2 | 2 | 3 | 0 | 50-59 | Male |
| 1 | 1 | 53 | 1 | 0 | 140 | 203.0 | 1 | 0 | 155 | 1 | 3.1 | 0 | 0 | 3 | 0 | 50-59 | Male |
| 2 | 2 | 70 | 1 | 0 | 145 | 174.0 | 0 | 1 | 125 | 1 | 2.6 | 0 | 0 | 3 | 0 | 70-79 | Male |
| 3 | 3 | 61 | 1 | 0 | 148 | 203.0 | 0 | 1 | 161 | 0 | 0.0 | 2 | 1 | 3 | 0 | 60-69 | Male |
| 4 | 4 | 62 | 0 | 0 | 138 | 294.0 | 1 | 1 | 106 | 0 | 1.9 | 1 | 3 | 2 | 0 | 60-69 | Female |
sex_percentage = df.groupby(["sex_category","target"])["target"].count().reset_index(name='total_target')
# Calculate the total for each cp_category
total_counts = sex_percentage.groupby('sex_category')['total_target'].transform('sum')
# Calculate the percentage for each target within each cp_category
sex_percentage['percentage'] = (round(sex_percentage['total_target'] / total_counts,2)) * 100
sex_percentage
| sex_category | target | total_target | percentage | |
|---|---|---|---|---|
| 0 | Female | 0 | 24 | 25.0 |
| 1 | Female | 1 | 72 | 75.0 |
| 2 | Male | 0 | 114 | 55.0 |
| 3 | Male | 1 | 92 | 45.0 |
import plotly.graph_objects as go
# Define colors
color_target_0 = '#c5c5c5'
color_target_1 = '#ff6b6b'
highlight_color = '#ff0000'
# Filter data for each target
no_heart_disease = sex_percentage[sex_percentage['target'] == 0]
heart_disease = sex_percentage[sex_percentage['target'] == 1]
# Identify the category with the highest heart disease percentage
if not heart_disease.empty:
max_index = heart_disease['percentage'].idxmax()
max_category = heart_disease.loc[max_index, 'sex_category']
else:
max_category = None
# Assign colors for 'Heart Disease' bars, highlighting the max category
heart_disease_colors = heart_disease['sex_category'].apply(
lambda x: highlight_color if x == max_category else color_target_1
).tolist()
fig = go.Figure()
# Add 'No Heart Disease' bars
fig.add_trace(go.Bar(
y=no_heart_disease['sex_category'],
x=no_heart_disease['percentage'],
name='No Heart Disease',
orientation='h',
marker=dict(color=color_target_0),
text=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}% No Heart Disease")
))
# Add 'Heart Disease' bars with conditional coloring
fig.add_trace(go.Bar(
y=heart_disease['sex_category'],
x=heart_disease['percentage'],
name='Heart Disease',
orientation='h',
marker=dict(color=heart_disease_colors),
text=heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=heart_disease['percentage'].apply(lambda x: f"{int(x)}% Heart Disease")
))
# Update layout
fig.update_layout(
title=dict(
text="Distribution of Heart Disease by Gender",
font=dict(size=24, color="black", family="Arial", weight="bold"),
x=0,
xanchor="left"
),
xaxis=dict(
title=None,
tickvals=[0, 20, 40, 60, 80, 100],
ticktext=['0%', '20%', '40%', '60%', '80%', '100%'],
range=[0, 100],
tickfont=dict(size=15)
),
yaxis=dict(
title=None,
tickfont=dict(size=15)
),
barmode='stack',
plot_bgcolor="white",
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.3,
xanchor="center",
x=0.5,
traceorder="normal",
font=dict(size=15),
itemclick="toggleothers",
)
)
fig.show()
cp_mapping = {
0 : 'Typical angina',
1 : 'Atypical angina',
2 : 'Non-anginal pain',
3 : 'Asymptomatic'
}
# Create the cp_category column based on the cp column
df['cp_category'] = df['cp'].map(cp_mapping)
df
| index | age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | age_group | sex_category | cp_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 52 | 1 | 0 | 125 | 212.0 | 0 | 1 | 168 | 0 | 1.0 | 2 | 2 | 3 | 0 | 50-59 | Male | Typical angina |
| 1 | 1 | 53 | 1 | 0 | 140 | 203.0 | 1 | 0 | 155 | 1 | 3.1 | 0 | 0 | 3 | 0 | 50-59 | Male | Typical angina |
| 2 | 2 | 70 | 1 | 0 | 145 | 174.0 | 0 | 1 | 125 | 1 | 2.6 | 0 | 0 | 3 | 0 | 70-79 | Male | Typical angina |
| 3 | 3 | 61 | 1 | 0 | 148 | 203.0 | 0 | 1 | 161 | 0 | 0.0 | 2 | 1 | 3 | 0 | 60-69 | Male | Typical angina |
| 4 | 4 | 62 | 0 | 0 | 138 | 294.0 | 1 | 1 | 106 | 0 | 1.9 | 1 | 3 | 2 | 0 | 60-69 | Female | Typical angina |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 297 | 723 | 68 | 0 | 2 | 120 | 211.0 | 0 | 0 | 115 | 0 | 1.5 | 1 | 0 | 2 | 1 | 60-69 | Female | Non-anginal pain |
| 298 | 733 | 44 | 0 | 2 | 108 | 141.0 | 0 | 1 | 175 | 0 | 0.6 | 1 | 0 | 2 | 1 | 40-49 | Female | Non-anginal pain |
| 299 | 739 | 52 | 1 | 0 | 128 | 255.0 | 0 | 1 | 161 | 1 | 0.0 | 2 | 1 | 3 | 0 | 50-59 | Male | Typical angina |
| 300 | 843 | 59 | 1 | 3 | 160 | 273.0 | 0 | 0 | 125 | 0 | 0.0 | 2 | 0 | 2 | 0 | 50-59 | Male | Asymptomatic |
| 301 | 878 | 54 | 1 | 0 | 120 | 188.0 | 0 | 1 | 113 | 0 | 1.4 | 1 | 1 | 3 | 0 | 50-59 | Male | Typical angina |
302 rows × 18 columns
df["cp_category"].unique()
array(['Typical angina', 'Atypical angina', 'Non-anginal pain',
'Asymptomatic'], dtype=object)
cp_percentage = df.groupby(["cp_category","target"])["target"].count().reset_index(name='total_target')
# Calculate the total for each cp_category
total_counts = cp_percentage.groupby('cp_category')['total_target'].transform('sum')
# Calculate the percentage for each target within each cp_category
cp_percentage['percentage'] = (round(cp_percentage['total_target'] / total_counts,2)) * 100
cp_percentage
| cp_category | target | total_target | percentage | |
|---|---|---|---|---|
| 0 | Asymptomatic | 0 | 7 | 30.0 |
| 1 | Asymptomatic | 1 | 16 | 70.0 |
| 2 | Atypical angina | 0 | 9 | 18.0 |
| 3 | Atypical angina | 1 | 41 | 82.0 |
| 4 | Non-anginal pain | 0 | 18 | 21.0 |
| 5 | Non-anginal pain | 1 | 68 | 79.0 |
| 6 | Typical angina | 0 | 104 | 73.0 |
| 7 | Typical angina | 1 | 39 | 27.0 |
import plotly.graph_objects as go
# Define colors
color_target_0 = '#c5c5c5'
color_target_1 = '#ff6b6b'
highlight_color = '#ff0000'
# Filter data for each target
no_heart_disease = cp_percentage[cp_percentage['target'] == 0]
heart_disease = cp_percentage[cp_percentage['target'] == 1]
# Identify the category with the highest heart disease percentage
if not heart_disease.empty:
max_index = heart_disease['percentage'].idxmax()
max_category = heart_disease.loc[max_index, 'cp_category']
else:
max_category = None
# Assign colors for 'Heart Disease' bars, highlighting the max category
heart_disease_colors = heart_disease['cp_category'].apply(
lambda x: highlight_color if x == max_category else color_target_1
).tolist()
fig = go.Figure()
# Add 'No Heart Disease' bars
fig.add_trace(go.Bar(
y=no_heart_disease['cp_category'],
x=no_heart_disease['percentage'],
name='No Heart Disease',
orientation='h',
marker=dict(color=color_target_0),
text=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}% No Heart Disease")
))
# Add 'Heart Disease' bars with conditional coloring
fig.add_trace(go.Bar(
y=heart_disease['cp_category'],
x=heart_disease['percentage'],
name='Heart Disease',
orientation='h',
marker=dict(color=heart_disease_colors),
text=heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=heart_disease['percentage'].apply(lambda x: f"{int(x)}% Heart Disease")
))
# Update layout
fig.update_layout(
title=dict(
text="Distribution of Heart Disease by Chest Pain Type",
font=dict(size=24, color="black", family="Arial", weight="bold"),
x=0,
xanchor="left"
),
xaxis=dict(
title=None,
tickvals=[0, 20, 40, 60, 80, 100],
ticktext=['0%', '20%', '40%', '60%', '80%', '100%'],
range=[0, 100],
tickfont=dict(size=15)
),
yaxis=dict(
title=None,
tickfont=dict(size=15)
),
barmode='stack',
plot_bgcolor="white",
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.3,
xanchor="center",
x=0.5,
traceorder="normal",
font=dict(size=15),
itemclick="toggleothers",
)
)
fig.show()
df["fbs"].unique()
array([0, 1])
cp_mapping = {
0 : '<= 120 mg/dl',
1 : '> 120 mg/dl',}
# Create the cp_category column based on the cp column
df['fbs_category'] = df['fbs'].map(cp_mapping)
df
| index | age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | age_group | sex_category | cp_category | fbs_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 52 | 1 | 0 | 125 | 212.0 | 0 | 1 | 168 | 0 | 1.0 | 2 | 2 | 3 | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl |
| 1 | 1 | 53 | 1 | 0 | 140 | 203.0 | 1 | 0 | 155 | 1 | 3.1 | 0 | 0 | 3 | 0 | 50-59 | Male | Typical angina | > 120 mg/dl |
| 2 | 2 | 70 | 1 | 0 | 145 | 174.0 | 0 | 1 | 125 | 1 | 2.6 | 0 | 0 | 3 | 0 | 70-79 | Male | Typical angina | <= 120 mg/dl |
| 3 | 3 | 61 | 1 | 0 | 148 | 203.0 | 0 | 1 | 161 | 0 | 0.0 | 2 | 1 | 3 | 0 | 60-69 | Male | Typical angina | <= 120 mg/dl |
| 4 | 4 | 62 | 0 | 0 | 138 | 294.0 | 1 | 1 | 106 | 0 | 1.9 | 1 | 3 | 2 | 0 | 60-69 | Female | Typical angina | > 120 mg/dl |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 297 | 723 | 68 | 0 | 2 | 120 | 211.0 | 0 | 0 | 115 | 0 | 1.5 | 1 | 0 | 2 | 1 | 60-69 | Female | Non-anginal pain | <= 120 mg/dl |
| 298 | 733 | 44 | 0 | 2 | 108 | 141.0 | 0 | 1 | 175 | 0 | 0.6 | 1 | 0 | 2 | 1 | 40-49 | Female | Non-anginal pain | <= 120 mg/dl |
| 299 | 739 | 52 | 1 | 0 | 128 | 255.0 | 0 | 1 | 161 | 1 | 0.0 | 2 | 1 | 3 | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl |
| 300 | 843 | 59 | 1 | 3 | 160 | 273.0 | 0 | 0 | 125 | 0 | 0.0 | 2 | 0 | 2 | 0 | 50-59 | Male | Asymptomatic | <= 120 mg/dl |
| 301 | 878 | 54 | 1 | 0 | 120 | 188.0 | 0 | 1 | 113 | 0 | 1.4 | 1 | 1 | 3 | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl |
302 rows × 19 columns
fbs_percentage = df.groupby(["fbs_category","target"])["target"].count().reset_index(name='total_target')
# Calculate the total for each cp_category
total_counts = fbs_percentage.groupby('fbs_category')['total_target'].transform('sum')
# Calculate the percentage for each target within each cp_category
fbs_percentage['percentage'] = (round(fbs_percentage['total_target'] / total_counts,2)) * 100
fbs_percentage
| fbs_category | target | total_target | percentage | |
|---|---|---|---|---|
| 0 | <= 120 mg/dl | 0 | 116 | 45.0 |
| 1 | <= 120 mg/dl | 1 | 141 | 55.0 |
| 2 | > 120 mg/dl | 0 | 22 | 49.0 |
| 3 | > 120 mg/dl | 1 | 23 | 51.0 |
import plotly.graph_objects as go
# Define colors
color_target_0 = '#c5c5c5'
color_target_1 = '#ff6b6b'
highlight_color = '#ff0000'
# Filter data for each target
no_heart_disease = fbs_percentage[fbs_percentage['target'] == 0]
heart_disease = fbs_percentage[fbs_percentage['target'] == 1]
# Identify the category with the highest heart disease percentage
if not heart_disease.empty:
max_index = heart_disease['percentage'].idxmax()
max_category = heart_disease.loc[max_index, 'fbs_category']
else:
max_category = None
# Assign colors for 'Heart Disease' bars, highlighting the max category
heart_disease_colors = heart_disease['fbs_category'].apply(
lambda x: highlight_color if x == max_category else color_target_1
).tolist()
fig = go.Figure()
# Add 'No Heart Disease' bars
fig.add_trace(go.Bar(
y=no_heart_disease['fbs_category'],
x=no_heart_disease['percentage'],
name='No Heart Disease',
orientation='h',
marker=dict(color=color_target_0),
text=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}% No Heart Disease")
))
# Add 'Heart Disease' bars with conditional coloring
fig.add_trace(go.Bar(
y=heart_disease['fbs_category'],
x=heart_disease['percentage'],
name='Heart Disease',
orientation='h',
marker=dict(color=heart_disease_colors),
text=heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=heart_disease['percentage'].apply(lambda x: f"{int(x)}% Heart Disease")
))
# Update layout
fig.update_layout(
title=dict(
text="Distribution of Heart Disease by Fasting Blood Sugar Level",
font=dict(size=24, color="black", family="Arial", weight="bold"),
x=0,
xanchor="left"
),
xaxis=dict(
title=None,
tickvals=[0, 20, 40, 60, 80, 100],
ticktext=['0%', '20%', '40%', '60%', '80%', '100%'],
range=[0, 100],
tickfont=dict(size=15)
),
yaxis=dict(
title=None,
tickfont=dict(size=15)
),
barmode='stack',
plot_bgcolor="white",
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.3,
xanchor="center",
x=0.5,
traceorder="normal",
font=dict(size=15),
itemclick="toggleothers",
)
)
fig.show()
cp_mapping = {
0 : 'Normal',
1 : 'Having ST-T wave abnormality',
2 : 'Left ventricular hypertrophy'
}
# Create the cp_category column based on the cp column
df['restecg_category'] = df['restecg'].map(cp_mapping)
df
| index | age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | age_group | sex_category | cp_category | fbs_category | restecg_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 52 | 1 | 0 | 125 | 212.0 | 0 | 1 | 168 | 0 | 1.0 | 2 | 2 | 3 | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality |
| 1 | 1 | 53 | 1 | 0 | 140 | 203.0 | 1 | 0 | 155 | 1 | 3.1 | 0 | 0 | 3 | 0 | 50-59 | Male | Typical angina | > 120 mg/dl | Normal |
| 2 | 2 | 70 | 1 | 0 | 145 | 174.0 | 0 | 1 | 125 | 1 | 2.6 | 0 | 0 | 3 | 0 | 70-79 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality |
| 3 | 3 | 61 | 1 | 0 | 148 | 203.0 | 0 | 1 | 161 | 0 | 0.0 | 2 | 1 | 3 | 0 | 60-69 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality |
| 4 | 4 | 62 | 0 | 0 | 138 | 294.0 | 1 | 1 | 106 | 0 | 1.9 | 1 | 3 | 2 | 0 | 60-69 | Female | Typical angina | > 120 mg/dl | Having ST-T wave abnormality |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 297 | 723 | 68 | 0 | 2 | 120 | 211.0 | 0 | 0 | 115 | 0 | 1.5 | 1 | 0 | 2 | 1 | 60-69 | Female | Non-anginal pain | <= 120 mg/dl | Normal |
| 298 | 733 | 44 | 0 | 2 | 108 | 141.0 | 0 | 1 | 175 | 0 | 0.6 | 1 | 0 | 2 | 1 | 40-49 | Female | Non-anginal pain | <= 120 mg/dl | Having ST-T wave abnormality |
| 299 | 739 | 52 | 1 | 0 | 128 | 255.0 | 0 | 1 | 161 | 1 | 0.0 | 2 | 1 | 3 | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality |
| 300 | 843 | 59 | 1 | 3 | 160 | 273.0 | 0 | 0 | 125 | 0 | 0.0 | 2 | 0 | 2 | 0 | 50-59 | Male | Asymptomatic | <= 120 mg/dl | Normal |
| 301 | 878 | 54 | 1 | 0 | 120 | 188.0 | 0 | 1 | 113 | 0 | 1.4 | 1 | 1 | 3 | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality |
302 rows × 20 columns
restecg_percentage = df.groupby(["restecg_category","target"])["target"].count().reset_index(name='total_target')
# Calculate the total for each cp_category
total_counts = restecg_percentage.groupby('restecg_category')['total_target'].transform('sum')
# Calculate the percentage for each target within each cp_category
restecg_percentage['percentage'] = (round(restecg_percentage['total_target'] / total_counts,2)) * 100
restecg_percentage
| restecg_category | target | total_target | percentage | |
|---|---|---|---|---|
| 0 | Having ST-T wave abnormality | 0 | 56 | 37.0 |
| 1 | Having ST-T wave abnormality | 1 | 95 | 63.0 |
| 2 | Left ventricular hypertrophy | 0 | 3 | 75.0 |
| 3 | Left ventricular hypertrophy | 1 | 1 | 25.0 |
| 4 | Normal | 0 | 79 | 54.0 |
| 5 | Normal | 1 | 68 | 46.0 |
import plotly.graph_objects as go
# Define colors
color_target_0 = '#c5c5c5'
color_target_1 = '#ff6b6b'
highlight_color = '#ff0000'
# Filter data for each target
no_heart_disease = restecg_percentage[restecg_percentage['target'] == 0]
heart_disease = restecg_percentage[restecg_percentage['target'] == 1]
# Identify the category with the highest heart disease percentage
if not heart_disease.empty:
max_index = heart_disease['percentage'].idxmax()
max_category = heart_disease.loc[max_index, 'restecg_category']
else:
max_category = None
# Assign colors for 'Heart Disease' bars, highlighting the max category
heart_disease_colors = heart_disease['restecg_category'].apply(
lambda x: highlight_color if x == max_category else color_target_1
).tolist()
# Create the figure
fig = go.Figure()
# Add 'No Heart Disease' bars
fig.add_trace(go.Bar(
y=no_heart_disease['restecg_category'],
x=no_heart_disease['percentage'],
name='No Heart Disease',
orientation='h',
marker=dict(color=color_target_0),
text=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}% No Heart Disease")
))
# Add 'Heart Disease' bars with conditional coloring
fig.add_trace(go.Bar(
y=heart_disease['restecg_category'],
x=heart_disease['percentage'],
name='Heart Disease',
orientation='h',
marker=dict(color=heart_disease_colors),
text=heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=heart_disease['percentage'].apply(lambda x: f"{int(x)}% Heart Disease")
))
# Update layout
fig.update_layout(
title=dict(
text="Distribution of Heart Disease by Resting Electrocardiographic Results",
font=dict(size=24, color="black", family="Arial", weight="bold"),
x=0,
xanchor="left"
),
xaxis=dict(
title=None,
tickvals=[0, 20, 40, 60, 80, 100],
ticktext=['0%', '20%', '40%', '60%', '80%', '100%'],
range=[0, 100],
tickfont=dict(size=15)
),
yaxis=dict(
title=None,
tickfont=dict(size=15)
),
barmode='stack',
plot_bgcolor="white",
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.3,
xanchor="center",
x=0.5,
traceorder="normal",
font=dict(size=15),
itemclick="toggleothers",
)
)
# Show plot
fig.show()
exang_mapping = {
0 : 'No',
1 : 'Yes',
}
slope_mapping = {
0 : 'Upsloping',
1 : 'Flat',
2: 'Downsloping'
}
ca_mapping = {
0: 'No vessels colored',
1: '1 major vessel colored',
2: '2 major vessels colored',
3: '3 major vessels colored'
}
thal_mapping = {
1 : 'Normal',
2: 'Fixed defect',
3: 'Reversible defect'
}
# Create the cp_category column based on the cp column
df['exang_category'] = df['exang'].map(exang_mapping)
df['slope_category'] = df['slope'].map(slope_mapping)
df['ca_category'] = df['ca'].map(ca_mapping)
df['thal_category'] = df['thal'].map(thal_mapping)
df
| index | age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | ... | target | age_group | sex_category | cp_category | fbs_category | restecg_category | exang_category | slope_category | ca_category | thal_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 52 | 1 | 0 | 125 | 212.0 | 0 | 1 | 168 | 0 | ... | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | No | Downsloping | 2 major vessels colored | Reversible defect |
| 1 | 1 | 53 | 1 | 0 | 140 | 203.0 | 1 | 0 | 155 | 1 | ... | 0 | 50-59 | Male | Typical angina | > 120 mg/dl | Normal | Yes | Upsloping | No vessels colored | Reversible defect |
| 2 | 2 | 70 | 1 | 0 | 145 | 174.0 | 0 | 1 | 125 | 1 | ... | 0 | 70-79 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | Yes | Upsloping | No vessels colored | Reversible defect |
| 3 | 3 | 61 | 1 | 0 | 148 | 203.0 | 0 | 1 | 161 | 0 | ... | 0 | 60-69 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | No | Downsloping | 1 major vessel colored | Reversible defect |
| 4 | 4 | 62 | 0 | 0 | 138 | 294.0 | 1 | 1 | 106 | 0 | ... | 0 | 60-69 | Female | Typical angina | > 120 mg/dl | Having ST-T wave abnormality | No | Flat | 3 major vessels colored | Fixed defect |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 297 | 723 | 68 | 0 | 2 | 120 | 211.0 | 0 | 0 | 115 | 0 | ... | 1 | 60-69 | Female | Non-anginal pain | <= 120 mg/dl | Normal | No | Flat | No vessels colored | Fixed defect |
| 298 | 733 | 44 | 0 | 2 | 108 | 141.0 | 0 | 1 | 175 | 0 | ... | 1 | 40-49 | Female | Non-anginal pain | <= 120 mg/dl | Having ST-T wave abnormality | No | Flat | No vessels colored | Fixed defect |
| 299 | 739 | 52 | 1 | 0 | 128 | 255.0 | 0 | 1 | 161 | 1 | ... | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | Yes | Downsloping | 1 major vessel colored | Reversible defect |
| 300 | 843 | 59 | 1 | 3 | 160 | 273.0 | 0 | 0 | 125 | 0 | ... | 0 | 50-59 | Male | Asymptomatic | <= 120 mg/dl | Normal | No | Downsloping | No vessels colored | Fixed defect |
| 301 | 878 | 54 | 1 | 0 | 120 | 188.0 | 0 | 1 | 113 | 0 | ... | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | No | Flat | 1 major vessel colored | Reversible defect |
302 rows × 24 columns
#exang_category
exang_percentage = df.groupby(["exang_category","target"])["target"].count().reset_index(name='total_target')
total_counts = exang_percentage.groupby('exang_category')['total_target'].transform('sum')
exang_percentage['percentage'] = (round(exang_percentage['total_target'] / total_counts,2)) * 100
#slope_category
slope_percentage = df.groupby(["slope_category","target"])["target"].count().reset_index(name='total_target')
total_counts = slope_percentage.groupby('slope_category')['total_target'].transform('sum')
slope_percentage['percentage'] = (round(slope_percentage['total_target'] / total_counts,2)) * 100
#ca_category
ca_percentage = df.groupby(["ca_category","target"])["target"].count().reset_index(name='total_target')
total_counts = ca_percentage.groupby('ca_category')['total_target'].transform('sum')
ca_percentage['percentage'] = (round(ca_percentage['total_target'] / total_counts,2)) * 100
#thal_category
thal_percentage = df.groupby(["thal_category","target"])["target"].count().reset_index(name='total_target')
total_counts = thal_percentage.groupby('thal_category')['total_target'].transform('sum')
thal_percentage['percentage'] = (round(thal_percentage['total_target'] / total_counts,2)) * 100
import plotly.graph_objects as go
# Define colors
color_target_0 = '#c5c5c5'
color_target_1 = '#ff6b6b'
highlight_color = '#ff0000'
# Filter data for each target
no_heart_disease = exang_percentage[exang_percentage['target'] == 0]
heart_disease = exang_percentage[exang_percentage['target'] == 1]
# Identify the category with the highest heart disease percentage
if not heart_disease.empty:
max_index = heart_disease['percentage'].idxmax()
max_category = heart_disease.loc[max_index, 'exang_category']
else:
max_category = None
# Assign colors for 'Heart Disease' bars, highlighting the max category
heart_disease_colors = heart_disease['exang_category'].apply(
lambda x: highlight_color if x == max_category else color_target_1
).tolist()
fig = go.Figure()
# Add 'No Heart Disease' bars
fig.add_trace(go.Bar(
y=no_heart_disease['exang_category'],
x=no_heart_disease['percentage'],
name='No Heart Disease',
orientation='h',
marker=dict(color=color_target_0),
text=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}% No Heart Disease")
))
# Add 'Heart Disease' bars with conditional coloring
fig.add_trace(go.Bar(
y=heart_disease['exang_category'],
x=heart_disease['percentage'],
name='Heart Disease',
orientation='h',
marker=dict(color=heart_disease_colors),
text=heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=heart_disease['percentage'].apply(lambda x: f"{int(x)}% Heart Disease")
))
fig.update_layout(
title=dict(
text="Distribution of Heart Disease by Exercise-Induced Angina ",
font=dict(size=24, color="black", family="Arial", weight="bold"),
x=0,
xanchor="left"
),
xaxis=dict(
title=None,
tickvals=[0, 20, 40, 60, 80, 100],
ticktext=['0%', '20%', '40%', '60%', '80%', '100%'],
range=[0, 100],
tickfont=dict(size=15)
),
yaxis=dict(
title=None,
tickfont=dict(size=15)
),
barmode='stack',
plot_bgcolor="white",
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.3,
xanchor="center",
x=0.5,
traceorder="normal",
font=dict(size=15),
itemclick="toggleothers",
)
)
fig.show()
import plotly.graph_objects as go
# Define colors
color_target_0 = '#c5c5c5'
color_target_1 = '#ff6b6b'
highlight_color = '#ff0000'
# Filter data for each target
no_heart_disease = slope_percentage[slope_percentage['target'] == 0]
heart_disease = slope_percentage[slope_percentage['target'] == 1]
# Identify the category with the highest heart disease percentage
if not heart_disease.empty:
max_index = heart_disease['percentage'].idxmax()
max_category = heart_disease.loc[max_index, 'slope_category']
else:
max_category = None
# Assign colors for 'Heart Disease' bars, highlighting the max category
heart_disease_colors = heart_disease['slope_category'].apply(
lambda x: highlight_color if x == max_category else color_target_1
).tolist()
fig = go.Figure()
# Add 'No Heart Disease' bars
fig.add_trace(go.Bar(
y=no_heart_disease['slope_category'],
x=no_heart_disease['percentage'],
name='No Heart Disease',
orientation='h',
marker=dict(color=color_target_0),
text=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}% No Heart Disease")
))
# Add 'Heart Disease' bars with conditional coloring
fig.add_trace(go.Bar(
y=heart_disease['slope_category'],
x=heart_disease['percentage'],
name='Heart Disease',
orientation='h',
marker=dict(color=heart_disease_colors),
text=heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=heart_disease['percentage'].apply(lambda x: f"{int(x)}% Heart Disease")
))
fig.update_layout(
title=dict(
text="Distribution of Heart Disease by The Slope of The Peak Exercise ST Segment",
font=dict(size=24, color="black", family="Arial", weight="bold"),
x=0,
xanchor="left"
),
xaxis=dict(
title=None,
tickvals=[0, 20, 40, 60, 80, 100],
ticktext=['0%', '20%', '40%', '60%', '80%', '100%'],
range=[0, 100],
tickfont=dict(size=15)
),
yaxis=dict(
title=None,
tickfont=dict(size=15)
),
barmode='stack',
plot_bgcolor="white",
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.3,
xanchor="center",
x=0.5,
traceorder="normal",
font=dict(size=15),
itemclick="toggleothers",
)
)
fig.show()
import plotly.graph_objects as go
# Define colors
color_target_0 = '#c5c5c5'
color_target_1 = '#ff6b6b'
highlight_color = '#ff0000'
# Filter data for each target
no_heart_disease = ca_percentage[ca_percentage['target'] == 0]
heart_disease = ca_percentage[ca_percentage['target'] == 1]
# Identify the category with the highest heart disease percentage
if not heart_disease.empty:
max_index = heart_disease['percentage'].idxmax()
max_category = heart_disease.loc[max_index, 'ca_category']
else:
max_category = None
heart_disease_colors = heart_disease['ca_category'].apply(
lambda x: highlight_color if x == max_category else color_target_1
).tolist()
fig = go.Figure()
# Add 'No Heart Disease' bars
fig.add_trace(go.Bar(
y=no_heart_disease['ca_category'],
x=no_heart_disease['percentage'],
name='No Heart Disease',
orientation='h',
marker=dict(color=color_target_0),
text=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}% No Heart Disease")
))
# Add 'Heart Disease' bars with conditional coloring
fig.add_trace(go.Bar(
y=heart_disease['ca_category'],
x=heart_disease['percentage'],
name='Heart Disease',
orientation='h',
marker=dict(color=heart_disease_colors),
text=heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=heart_disease['percentage'].apply(lambda x: f"{int(x)}% Heart Disease")
))
fig.update_layout(
title=dict(
text="Distribution of Heart Disease by Major Vessels Colored by Fluoroscopy",
font=dict(size=24, color="black", family="Arial", weight="bold"),
x=0,
xanchor="left"
),
xaxis=dict(
title=None,
tickvals=[0, 20, 40, 60, 80, 100],
ticktext=['0%', '20%', '40%', '60%', '80%', '100%'],
range=[0, 100],
tickfont=dict(size=15)
),
yaxis=dict(
title=None,
tickfont=dict(size=15)
),
barmode='stack',
plot_bgcolor="white",
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.3,
xanchor="center",
x=0.5,
traceorder="normal",
font=dict(size=15),
itemclick="toggleothers",
)
)
fig.show()
ca_percentage
| ca_category | target | total_target | percentage | |
|---|---|---|---|---|
| 0 | 1 major vessel colored | 0 | 44 | 68.0 |
| 1 | 1 major vessel colored | 1 | 21 | 32.0 |
| 2 | 2 major vessels colored | 0 | 31 | 82.0 |
| 3 | 2 major vessels colored | 1 | 7 | 18.0 |
| 4 | 3 major vessels colored | 0 | 17 | 85.0 |
| 5 | 3 major vessels colored | 1 | 3 | 15.0 |
| 6 | No vessels colored | 0 | 45 | 26.0 |
| 7 | No vessels colored | 1 | 130 | 74.0 |
import plotly.graph_objects as go
# Define colors
color_target_0 = '#c5c5c5'
color_target_1 = '#ff6b6b'
highlight_color = '#ff0000'
no_heart_disease = thal_percentage[thal_percentage['target'] == 0]
heart_disease = thal_percentage[thal_percentage['target'] == 1]
# Identify the category with the highest heart disease percentage
if not heart_disease.empty:
max_index = heart_disease['percentage'].idxmax()
max_category = heart_disease.loc[max_index, 'thal_category']
else:
max_category = None
# Assign colors for 'Heart Disease' bars, highlighting the max category
heart_disease_colors = heart_disease['thal_category'].apply(
lambda x: highlight_color if x == max_category else color_target_1
).tolist()
fig = go.Figure()
# Add 'No Heart Disease' bars
fig.add_trace(go.Bar(
y=no_heart_disease['thal_category'],
x=no_heart_disease['percentage'],
name='No Heart Disease',
orientation='h',
marker=dict(color=color_target_0),
text=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=no_heart_disease['percentage'].apply(lambda x: f"{int(x)}% No Heart Disease")
))
# Add 'Heart Disease' bars with conditional coloring
fig.add_trace(go.Bar(
y=heart_disease['thal_category'],
x=heart_disease['percentage'],
name='Heart Disease',
orientation='h',
marker=dict(color=heart_disease_colors),
text=heart_disease['percentage'].apply(lambda x: f"{int(x)}%"),
textposition='inside',
textfont=dict(size=15),
hoverinfo='text',
hovertext=heart_disease['percentage'].apply(lambda x: f"{int(x)}% Heart Disease")
))
fig.update_layout(
title=dict(
text="Distribution of Heart Disease by Thalassemia",
font=dict(size=24, color="black", family="Arial", weight="bold"),
x=0,
xanchor="left"
),
xaxis=dict(
title=None,
tickvals=[0, 20, 40, 60, 80, 100],
ticktext=['0%', '20%', '40%', '60%', '80%', '100%'],
range=[0, 100],
tickfont=dict(size=15)
),
yaxis=dict(
title=None,
tickfont=dict(size=15)
),
barmode='stack',
plot_bgcolor="white",
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.3,
xanchor="center",
x=0.5,
traceorder="normal",
font=dict(size=15),
itemclick="toggleothers",
)
)
fig.show()
df
| index | age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | ... | target | age_group | sex_category | cp_category | fbs_category | restecg_category | exang_category | slope_category | ca_category | thal_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 52 | 1 | 0 | 125 | 212.0 | 0 | 1 | 168 | 0 | ... | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | No | Downsloping | 2 major vessels colored | Reversible defect |
| 1 | 1 | 53 | 1 | 0 | 140 | 203.0 | 1 | 0 | 155 | 1 | ... | 0 | 50-59 | Male | Typical angina | > 120 mg/dl | Normal | Yes | Upsloping | No vessels colored | Reversible defect |
| 2 | 2 | 70 | 1 | 0 | 145 | 174.0 | 0 | 1 | 125 | 1 | ... | 0 | 70-79 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | Yes | Upsloping | No vessels colored | Reversible defect |
| 3 | 3 | 61 | 1 | 0 | 148 | 203.0 | 0 | 1 | 161 | 0 | ... | 0 | 60-69 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | No | Downsloping | 1 major vessel colored | Reversible defect |
| 4 | 4 | 62 | 0 | 0 | 138 | 294.0 | 1 | 1 | 106 | 0 | ... | 0 | 60-69 | Female | Typical angina | > 120 mg/dl | Having ST-T wave abnormality | No | Flat | 3 major vessels colored | Fixed defect |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 297 | 723 | 68 | 0 | 2 | 120 | 211.0 | 0 | 0 | 115 | 0 | ... | 1 | 60-69 | Female | Non-anginal pain | <= 120 mg/dl | Normal | No | Flat | No vessels colored | Fixed defect |
| 298 | 733 | 44 | 0 | 2 | 108 | 141.0 | 0 | 1 | 175 | 0 | ... | 1 | 40-49 | Female | Non-anginal pain | <= 120 mg/dl | Having ST-T wave abnormality | No | Flat | No vessels colored | Fixed defect |
| 299 | 739 | 52 | 1 | 0 | 128 | 255.0 | 0 | 1 | 161 | 1 | ... | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | Yes | Downsloping | 1 major vessel colored | Reversible defect |
| 300 | 843 | 59 | 1 | 3 | 160 | 273.0 | 0 | 0 | 125 | 0 | ... | 0 | 50-59 | Male | Asymptomatic | <= 120 mg/dl | Normal | No | Downsloping | No vessels colored | Fixed defect |
| 301 | 878 | 54 | 1 | 0 | 120 | 188.0 | 0 | 1 | 113 | 0 | ... | 0 | 50-59 | Male | Typical angina | <= 120 mg/dl | Having ST-T wave abnormality | No | Flat | 1 major vessel colored | Reversible defect |
302 rows × 24 columns
data = df[['trestbps', 'chol',
'thalach']]
corrmat= data.corr()
plt.figure(figsize=(10,5))
sns.heatmap(corrmat, annot=True, cmap='RdYlGn', center=0)
<Axes: >
data_with_intercept = sm.add_constant(data)
vif_data = pd.DataFrame()
vif_data["Variable"] = data_with_intercept.columns
vif_data["VIF"] = [variance_inflation_factor(data_with_intercept.values, i)
for i in range(data_with_intercept.shape[1])]
plt.figure(figsize=(10, 6))
sns.barplot(x='VIF', y='Variable', data=vif_data, palette='viridis')
plt.title('Variance Inflation Factor (VIF) for Each Variable', fontsize=15)
plt.xlabel('VIF', fontsize=12)
plt.ylabel('Variable', fontsize=12)
plt.show()
/tmp/ipykernel_3504/3758902526.py:9: FutureWarning: Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.
Elbow_M = KElbowVisualizer(KMeans(), k=10)
labels = Elbow_M.fit(data)
labels.show()
<Axes: title={'center': 'Distortion Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='distortion score'>
features = ['trestbps', 'thalach', 'chol']
X = df[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=features)
cluster_range = range(2, 11)
silhouette_scores = []
davies_bouldin_scores = []
for n_clusters in cluster_range:
kmeans = KMeans(
n_clusters=n_clusters,
init='k-means++',
n_init=10,
max_iter=300,
random_state=111
)
labels = kmeans.fit_predict(X_scaled)
# Menghitung Silhouette Score
silhouette_avg = silhouette_score(X_scaled, labels)
silhouette_scores.append(silhouette_avg)
# Menghitung Davies-Bouldin Index
davies_bouldin_avg = davies_bouldin_score(X_scaled, labels)
davies_bouldin_scores.append(davies_bouldin_avg)
# Membuat DataFrame untuk menyimpan hasil
results_df = pd.DataFrame({
'Number of Clusters': cluster_range,
'Silhouette Score': silhouette_scores,
'Davies-Bouldin Index': davies_bouldin_scores
})
results_df
results_df.sort_values(by="Silhouette Score", ascending=False).style.background_gradient(cmap='Greys')
| Number of Clusters | Silhouette Score | Davies-Bouldin Index | |
|---|---|---|---|
| 2 | 4 | 0.267604 | 1.136922 |
| 4 | 6 | 0.264203 | 1.100376 |
| 0 | 2 | 0.262386 | 1.583724 |
| 1 | 3 | 0.254036 | 1.289038 |
| 6 | 8 | 0.251404 | 1.088724 |
| 3 | 5 | 0.249821 | 1.102119 |
| 7 | 9 | 0.246821 | 1.099206 |
| 8 | 10 | 0.239248 | 1.100932 |
| 5 | 7 | 0.235945 | 1.112547 |
# Visualisasi Silhouette Score dan Davies-Bouldin Index
plt.figure(figsize=(14, 7))
# Silhouette Score
plt.subplot(1, 2, 1)
plt.plot(cluster_range, silhouette_scores, marker='o', linestyle='-', color='b')
plt.title('Silhouette Score vs Number of Clusters')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.xticks(cluster_range)
plt.grid(True)
# Davies-Bouldin Index
plt.subplot(1, 2, 2)
plt.plot(cluster_range, davies_bouldin_scores, marker='o', linestyle='-', color='r')
plt.title('Davies-Bouldin Index vs Number of Clusters')
plt.xlabel('Number of Clusters')
plt.ylabel('Davies-Bouldin Index')
plt.xticks(cluster_range)
plt.grid(True)
plt.tight_layout()
plt.show()
# 1. Preprocessing Data (Penskalaan)
features = ['trestbps', 'thalach', 'chol']
X = df[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=4, random_state=42)
df['cluster'] = kmeans.fit_predict(X_scaled)
centers_scaled = kmeans.cluster_centers_
centers = scaler.inverse_transform(centers_scaled)
centers_df = pd.DataFrame(centers, columns=features)
centers_df['cluster'] = range(4)
fig = px.scatter_3d(
df,
x='trestbps',
y='thalach',
z='chol',
color=df['cluster'].astype(str),
symbol=df['target'].astype(str),
color_discrete_sequence=['green', 'orange', 'blue', 'red'],
size_max=10,
hover_data={
'trestbps': True,
'thalach': True,
'chol': True,
'target': True,
'cluster': True
},
labels={
'trestbps': 'Tekanan Darah Istirahat (trestbps)',
'thalach': 'Maksimum Detak Jantung (thalach)',
'chol': 'Kolesterol (chol)',
'target': 'Target',
'cluster': 'Cluster'
},
title='3D Scatter Plot dengan K-Means Clustering (4 Cluster)',
width=800,
height=1000
)
# Menambahkan titik pusat cluster
fig.add_trace(
px.scatter_3d(
centers_df,
x='trestbps',
y='thalach',
z='chol',
color='cluster',
color_discrete_sequence=['black']*len(centers_df),
symbol='cluster',
size=[12]*len(centers_df),
opacity=1,
hover_data={
'trestbps': True,
'thalach': True,
'chol': True,
'cluster': True
},
labels={
'trestbps': 'Tekanan Darah Istirahat (trestbps)',
'thalach': 'Maksimum Detak Jantung (thalach)',
'chol': 'Kolesterol (chol)',
'cluster': 'Cluster'
}
).data[0]
)
# Menyesuaikan layout untuk kejelasan
fig.update_layout(
scene=dict(
xaxis=dict(title='Tekanan Darah Istirahat (trestbps)', showticklabels=False, backgroundcolor="rgb(230, 230, 230)"),
yaxis=dict(title='Maksimum Detak Jantung (thalach)', showticklabels=False, backgroundcolor="rgb(230, 230, 230)"),
zaxis=dict(title='Kolesterol (chol)', showticklabels=False, backgroundcolor="rgb(230, 230, 230)"),
camera=dict(
eye=dict(x=1.5, y=1.5, z=1.5)
)
),
legend=dict(
title='Keterangan',
x=0.85,
y=0.95
),
title=dict(
x=0.5,
y=0.95,
xanchor='center',
yanchor='top'
)
)
fig.update_traces(marker=dict(size=5, opacity=0.8))
fig.show()
import plotly.express as px
# Visualisasi 2D Scatter Plot: trestbps vs thalach
fig1 = px.scatter(
df,
x='trestbps',
y='thalach',
color=df['cluster'].astype(str),
symbol=df['target'].astype(str),
color_discrete_sequence=['green', 'orange', 'blue', 'red'],
hover_data={
'trestbps': True,
'thalach': True,
'chol': True,
'target': True,
'cluster': True
},
labels={
'trestbps': 'Tekanan Darah Istirahat (trestbps)',
'thalach': 'Maksimum Detak Jantung (thalach)',
'target': 'Target',
'cluster': 'Cluster'
},
title='2D Scatter Plot: trestbps vs thalach'
)
fig1.show()
# Visualisasi 2D Scatter Plot: trestbps vs chol
fig2 = px.scatter(
df,
x='trestbps',
y='chol',
color=df['cluster'].astype(str),
symbol=df['target'].astype(str),
color_discrete_sequence=['green', 'orange', 'blue', 'red'],
hover_data={
'trestbps': True,
'thalach': True,
'chol': True,
'target': True,
'cluster': True
},
labels={
'trestbps': 'Tekanan Darah Istirahat (trestbps)',
'chol': 'Kolesterol (chol)',
'target': 'Target',
'cluster': 'Cluster'
},
title='2D Scatter Plot: trestbps vs chol'
)
fig2.show()
# Visualisasi 2D Scatter Plot: thalach vs chol
fig3 = px.scatter(
df,
x='thalach',
y='chol',
color=df['cluster'].astype(str),
symbol=df['target'].astype(str),
color_discrete_sequence=['green', 'orange', 'blue', 'red'],
hover_data={
'trestbps': True,
'thalach': True,
'chol': True,
'target': True,
'cluster': True
},
labels={
'thalach': 'Maksimum Detak Jantung (thalach)',
'chol': 'Kolesterol (chol)',
'target': 'Target',
'cluster': 'Cluster'
},
title='2D Scatter Plot: thalach vs chol'
)
fig3.show()
# Membuat Statistik Deskriptif per Cluster
cluster_summary = df.groupby('cluster').describe()
# Menampilkan secara ringkas hanya statistik yang relevan (misalnya mean dan std)
cluster_summary_mean_std = df.groupby('cluster')[['trestbps', 'thalach', 'chol']].agg(['min', 'max','median',"std"])
cluster_summary_mean_std
| trestbps | thalach | chol | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| min | max | median | std | min | max | median | std | min | max | median | std | |
| cluster | ||||||||||||
| 0 | 94 | 150 | 125.0 | 12.556069 | 142 | 202 | 165.0 | 13.170838 | 126.0 | 271.0 | 223.0 | 27.786386 |
| 1 | 100 | 164 | 126.0 | 13.633521 | 71 | 143 | 122.0 | 14.935229 | 131.0 | 294.0 | 216.0 | 39.440937 |
| 2 | 100 | 160 | 130.0 | 10.601027 | 109 | 182 | 153.0 | 15.249750 | 253.0 | 417.0 | 303.0 | 37.112065 |
| 3 | 140 | 200 | 158.0 | 13.287866 | 108 | 195 | 150.0 | 18.367360 | 164.0 | 327.0 | 246.5 | 35.126418 |
Analisis Setiap Cluster Cluster 0:
Tekanan Darah Istirahat (trestbps): 94 - 150 Detak Jantung Maksimum (thalach): 142 - 202 Kolesterol (chol): 126 - 271 Karakteristik: Detak jantung maksimum yang tinggi (sampai 202) dan kolesterol yang rendah hingga sedang (126-271). Nama Usulan: "Cluster Detak Jantung Tinggi" Cluster 1:
Tekanan Darah Istirahat (trestbps): 100 - 164 Detak Jantung Maksimum (thalach): 71 - 143 Kolesterol (chol): 131 - 294 Karakteristik: Detak jantung maksimum yang lebih rendah (maksimum 143) dengan rentang kolesterol yang sedang (131-294). Nama Usulan: "Cluster Kolesterol Sedang dan Detak Jantung Rendah" Cluster 2:
Tekanan Darah Istirahat (trestbps): 100 - 160 Detak Jantung Maksimum (thalach): 109 - 182 Kolesterol (chol): 253 - 417 Karakteristik: Kolesterol tinggi (253-417) dengan detak jantung maksimum menengah. Nama Usulan: "Cluster Kolesterol Tinggi" Cluster 3:
Tekanan Darah Istirahat (trestbps): 140 - 200 Detak Jantung Maksimum (thalach): 108 - 195 Kolesterol (chol): 164 - 327 Karakteristik: Tekanan darah istirahat yang tinggi (140-200) dan rentang kolesterol yang lebih tinggi. Nama Usulan: "Cluster Tekanan Darah Tinggi"
# Menentukan nama cluster berdasarkan karakteristik statistik deskriptif
cluster_names = {
0: 'Cluster Detak Jantung Tinggi',
1: 'Cluster Kolesterol Sedang dan Detak Jantung Rendah',
2: 'Cluster Kolesterol Tinggi',
3: 'Cluster Tekanan Darah Tinggi'
}
# Menambahkan kolom baru dengan nama cluster
df['cluster_name'] = df['cluster'].map(cluster_names)
# Menampilkan hasil DataFrame dengan nama cluster
df[['trestbps', 'thalach', 'chol', 'cluster', 'cluster_name']]
| trestbps | thalach | chol | cluster | cluster_name | |
|---|---|---|---|---|---|
| 0 | 125 | 168 | 212.0 | 0 | Cluster Detak Jantung Tinggi |
| 1 | 140 | 155 | 203.0 | 0 | Cluster Detak Jantung Tinggi |
| 2 | 145 | 125 | 174.0 | 1 | Cluster Kolesterol Sedang dan Detak Jantung Re... |
| 3 | 148 | 161 | 203.0 | 3 | Cluster Tekanan Darah Tinggi |
| 4 | 138 | 106 | 294.0 | 1 | Cluster Kolesterol Sedang dan Detak Jantung Re... |
| ... | ... | ... | ... | ... | ... |
| 297 | 120 | 115 | 211.0 | 1 | Cluster Kolesterol Sedang dan Detak Jantung Re... |
| 298 | 108 | 175 | 141.0 | 0 | Cluster Detak Jantung Tinggi |
| 299 | 128 | 161 | 255.0 | 0 | Cluster Detak Jantung Tinggi |
| 300 | 160 | 125 | 273.0 | 3 | Cluster Tekanan Darah Tinggi |
| 301 | 120 | 113 | 188.0 | 1 | Cluster Kolesterol Sedang dan Detak Jantung Re... |
302 rows × 5 columns
df_numeric = df[["target","cluster_name"]]
df_numeric.head()
| target | cluster_name | |
|---|---|---|
| 0 | 0 | Cluster Detak Jantung Tinggi |
| 1 | 0 | Cluster Detak Jantung Tinggi |
| 2 | 0 | Cluster Kolesterol Sedang dan Detak Jantung Re... |
| 3 | 0 | Cluster Tekanan Darah Tinggi |
| 4 | 0 | Cluster Kolesterol Sedang dan Detak Jantung Re... |
df_summary = df_numeric.groupby(["cluster_name","target"])["target"].count().reset_index(name='total_target')
df_summary
| cluster_name | target | total_target | |
|---|---|---|---|
| 0 | Cluster Detak Jantung Tinggi | 0 | 29 |
| 1 | Cluster Detak Jantung Tinggi | 1 | 88 |
| 2 | Cluster Kolesterol Sedang dan Detak Jantung Re... | 0 | 47 |
| 3 | Cluster Kolesterol Sedang dan Detak Jantung Re... | 1 | 20 |
| 4 | Cluster Kolesterol Tinggi | 0 | 36 |
| 5 | Cluster Kolesterol Tinggi | 1 | 34 |
| 6 | Cluster Tekanan Darah Tinggi | 0 | 26 |
| 7 | Cluster Tekanan Darah Tinggi | 1 | 22 |
# Pivot table untuk mendapatkan total per cluster dan target
pivot_df = df_summary.pivot(index='cluster_name', columns='target', values='total_target').reset_index()
pivot_df.columns = ['cluster_name', 'no_heart_disease', 'heart_disease']
# Menghitung total kasus di setiap cluster
pivot_df['total_cases'] = pivot_df['no_heart_disease'] + pivot_df['heart_disease']
# Menghitung peluang terkena serangan jantung di setiap cluster
pivot_df['heart_disease_probability (%)'] = round((pivot_df['heart_disease'] / pivot_df['total_cases']) * 100, 0).astype(int)
# Menampilkan hasil peluang serangan jantung per cluster
pivot_df_sorted = pivot_df[['cluster_name', 'heart_disease_probability (%)']].sort_values(by="heart_disease_probability (%)", ascending=False)
pivot_df_styled = pivot_df_sorted.style.background_gradient(cmap='Reds')
pivot_df_styled
| cluster_name | heart_disease_probability (%) | |
|---|---|---|
| 0 | Cluster Detak Jantung Tinggi | 75 |
| 2 | Cluster Kolesterol Tinggi | 49 |
| 3 | Cluster Tekanan Darah Tinggi | 46 |
| 1 | Cluster Kolesterol Sedang dan Detak Jantung Rendah | 30 |
df.columns
Index(['index', 'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target',
'age_group', 'sex_category', 'cp_category', 'fbs_category',
'restecg_category', 'exang_category', 'slope_category', 'ca_category',
'thal_category', 'cluster', 'cluster_name'],
dtype='object')
df["oldpeak"]
0 1.0
1 3.1
2 2.6
3 0.0
4 1.9
...
297 1.5
298 0.6
299 0.0
300 0.0
301 1.4
Name: oldpeak, Length: 302, dtype: float64
import plotly.graph_objects as go
fig = go.Figure()
# Add histogram trace for "No Heart Disease" (target = 0) in grey
fig.add_trace(go.Histogram(
x=df[df['target'] == 0]['oldpeak'],
marker=dict(color='grey'),
name='No Heart Disease',
xbins=dict(size=0.5) # Sesuaikan ukuran bin jika perlu
))
# Add histogram trace for "Heart Disease" (target = 1) in red
fig.add_trace(go.Histogram(
x=df[df['target'] == 1]['oldpeak'],
marker=dict(color='#C62E2E'),
name='Heart Disease',
xbins=dict(size=0.5) # Sesuaikan ukuran bin jika perlu
))
# Add dashed line for average count
fig.add_shape(
type="line",
x0=0, x1=1, y0=18, y1=18,
xref="paper", yref="y",
line=dict(color="#4A4947", width=2, dash="dash")
)
# Update layout
fig.update_layout(
title=dict(
text="Heart Disease Count by Oldpeak",
font=dict(size=20, color="black", family="Arial", weight="bold"),
x=0.065,
xanchor="left"
),
xaxis=dict(
title="Old Peak",
title_font=dict(size=14, weight="bold"),
tickmode="linear",
dtick=0.5,
showline=True,
linecolor="black",
linewidth=2
),
yaxis=dict(
title=None,
showline=True,
showticklabels=False,
linewidth=2,
),
plot_bgcolor="white",
bargap=0.2,
barmode='group'
)
# Add hover template for custom text on hover
fig.update_traces(
hovertemplate='Old Peak Range: %{x}<br>Count: %{y}',
)
# Adjust annotation with flexible positioning
fig.add_annotation(
text="The old peak level in certain ranges is more likely<br>to be associated with heart disease cases.<br>This histogram shows distribution across cases.",
xref='paper',
yref='paper',
x=0.345,
y=1.1,
showarrow=False,
xanchor="right",
font=dict(size=12, color='black'),
align='left'
)
# Adjust annotation with flexible positioning
fig.add_annotation(
text=f'<b>Heart Disease avg (18)</b> ',
xref='paper',
yref='paper',
x=1.03,
y=0.23,
showarrow=False,
xanchor="right",
font=dict(size=12, color='#4A4947'),
align='left'
)
# Show plot
fig.show()
def categorize_oldpeak(value):
if -0.2 <= value <= 0.2:
return '-0.2-0.2'
elif 0.3 <= value <= 0.7:
return '0.3-0.7'
elif 0.8 <= value <= 1.2:
return '0.8-1.2'
elif 1.3 <= value <= 1.7:
return '1.3-1.7'
elif 1.8 <= value <= 2.2:
return '1.8-2.2'
elif 2.3 <= value <= 2.7:
return '2.3-2.7'
elif 2.8 <= value <= 3.2:
return '2.8-3.2'
elif 3.3 <= value <= 3.7:
return '3.3-3.7'
elif 3.8 <= value <= 4.2:
return '3.8-4.2'
elif 4.3 <= value <= 4.7:
return '4.3-4.7'
elif 4.8 <= value <= 5.2:
return '4.8-5.2'
elif 5.3 <= value <= 5.7:
return '5.3-5.7'
elif 5.8 <= value <= 6.0:
return '5.8-6.0'
else:
return 'Out of Range'
# Terapkan fungsi untuk membuat kolom kategori
df['oldpeak_category'] = df['oldpeak'].apply(categorize_oldpeak)
df['oldpeak_category']
0 0.8-1.2
1 2.8-3.2
2 2.3-2.7
3 -0.2-0.2
4 1.8-2.2
...
297 1.3-1.7
298 0.3-0.7
299 -0.2-0.2
300 -0.2-0.2
301 1.3-1.7
Name: oldpeak_category, Length: 302, dtype: object
oldpeak_target_avg = df.groupby(["oldpeak_category","target"])["target"].count().reset_index(name='total_oldpeak_category')
oldpeak_target_avg = oldpeak_target_avg[oldpeak_target_avg["target"]==1]
oldpeak_target_avg = oldpeak_target_avg.total_oldpeak_category.mean()
f'Nilai rata-rata dari pengidap serangan jantung di setiap usia {round(oldpeak_target_avg,0)}'
'Nilai rata-rata dari pengidap serangan jantung di setiap usia 18.0'